In [ ]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
import seaborn as sns
from copairs.map import average_precision
from copairs.map import mean_average_precision
import pycytominer
In [ ]:
def BRD_ID(i):
    if type(i) != float:
        ID = i.split('-')
        if len(ID) >1:
            return ID[1]
In [ ]:
moa_metadata = pd.read_csv('copairs_csv\\LC00009948_MoA_Common_Names.csv')
In [ ]:
#A fucntion to run copairs, save csv and plot the mAP graph
def copairs_batches_earlytimepoint(input_dict, mAP=''):
    
    #defining the parameters for performing copairs
    pert_col = "Metadata_broad_sample"
    control_col = "Metadata_control_type"

    pos_sameby = [pert_col]
    pos_diffby = []

    neg_sameby = []

    #neg_diffby varies based on whether the mAP needs to be calculated with respect to the controls or treatments 
    if mAP == 'Control':
        neg_diffby=[control_col]
    else:
        neg_diffby=[pert_col]
    
    batch_size = 20000
    null_size = 10000

    output_dict = {}
    for i in input_dict:
         
        with open(i, 'rb') as filetype:
            if filetype.read(2) == b'\x1f\x8b':
                df = pd.read_csv(i, compression='gzip')
            else:
                df = pd.read_csv(i)
        name = input_dict.get(i)
        metadata_columns = [ c for c in df.columns if 'Metadata' in c]
        feature_columns = [c for c in df.columns if not 'Metadata' in c]

        meta = df[metadata_columns].copy()

        meta['Metadata_control_type'] = meta['Metadata_control_type'].fillna('trt')
        meta['Metadata_broad_sample'] = meta['Metadata_broad_sample'].fillna('control')
        features = df[feature_columns]
        features = features.dropna(axis=1).values
        result = average_precision(meta, features, pos_sameby, pos_diffby, neg_sameby, neg_diffby, batch_size)
        #result.to_csv(f"{i[:-4]}_Result_NegconNorm_mAP_wrt_{mAP}.csv")

        aggregated_mAP = mean_average_precision(result, sameby=pos_sameby, null_size= 10000, threshold=0.05, seed=2)
        output_dict[name] = aggregated_mAP

        #output_dict[name].to_csv(f"{i[:-4]}_Aggregate_result_NegconNorm_mAP_wrt_{mAP}.csv")



    combined_df = pd.DataFrame()
    for i in output_dict.keys():
        df = output_dict.get(i)
        combined_df = pd.concat([combined_df, df.assign(dataset = i)])
    
    plt.figure(figsize=(13,10))
    sns.boxenplot(data=combined_df,y='dataset', x='mean_average_precision', palette='Set2')
    sns.set_style('whitegrid')
    sns.set(font='sans serif')
    plt.xlabel('Mean Average Precision', fontsize=18)
    plt.ylabel('', fontsize=18)
    plt.yticks(fontsize=18)
    plt.show()
    plt.show('notebook')

    return plt, output_dict
In [ ]:
def cell_count_norm_colorscheme_early(dict1, dict2):
    new_dict = {}
    for i in dict1:  
        raw_df = pd.read_csv(i)
        if 'phasefeatures' in i:
            raw_df = raw_df.rename(columns={'Metadata_broad_sample.1':'Metadata_BRD ID'})
            test_df = raw_df.groupby('Metadata_BRD ID')['Metadata_Count_Cells'].mean().to_frame()
            test_df = test_df.reset_index()
            test_df = test_df.rename(columns={'Metadata_BRD ID':'BRD ID','Metadata_Count_Cells':'Metadata_Count_Cells'+dict1[i]})
            test_df['Metadata_Count_Cells'+dict1[i]+'_norm'] = test_df['Metadata_Count_Cells'+dict1[i]]/100
            new_dict.update({dict1.get(i):test_df})
        else:
            test_df = raw_df.groupby('Metadata_BRD ID')['Metadata_Count_Cells'].mean().to_frame()
            test_df = test_df.reset_index()
            test_df = test_df.rename(columns={'Metadata_BRD ID':'BRD ID','Metadata_Count_Cells':'Metadata_Count_Cells'+dict1[i]})
            test_df['Metadata_Count_Cells'+dict1[i]+'_norm'] = test_df['Metadata_Count_Cells'+dict1[i]]/100
            new_dict.update({dict1.get(i):test_df})

    
    cell_count_df = None
    for key, df in new_dict.items():
        if cell_count_df is None:
            cell_count_df = df.copy()
        else:
            cell_count_df = pd.merge(cell_count_df, df, on='BRD ID')


    output_dict = {}
    for i in dict2:
        raw_df = pd.read_csv(i)
        raw_df = raw_df.rename(columns={'mean_average_precision':'mean_average_precision'+dict2.get(i)})
        subset_df = raw_df[['Metadata_broad_sample', 'mean_average_precision'+dict2.get(i)]]
        output_dict[dict2.get(i)] = subset_df


    combined_df = None 
    for key, df in output_dict.items():
        if combined_df is None:
            combined_df = df.copy()
        else:
            combined_df = pd.merge(combined_df, df, on='Metadata_broad_sample')
            
    
    combined_df['BRD ID'] = combined_df['Metadata_broad_sample'].map(BRD_ID)
    

    combined_df_metadata = pd.merge(combined_df, moa_metadata, on='BRD ID')
    combined_df_metadata_cell_count = pd.merge(combined_df_metadata, cell_count_df, on='BRD ID')
    combined_df_metadata_cell_count = combined_df_metadata_cell_count.rename(columns={'mean_average_precisionSaguaro+DRAQ7_4h':'Saguaro+DRAQ7_4h', 'mean_average_precisionSaguaro+DRAQ7_24h':'Saguaro+DRAQ7_24h', 'mean_average_precisionSaguaro_4h':'Saguaro_4h', 'mean_average_precisionSaguaro_24h':'Saguaro_24h', 'mean_average_precisionSaguaro_48h':'Saguaro_48h'})

    plot = go.Figure()
    
    plot.add_trace(go.Scatter(x=combined_df_metadata_cell_count['MoA'], y=combined_df_metadata_cell_count['Saguaro+DRAQ7_4h'],hovertext=combined_df_metadata_cell_count['Common Name'], mode='markers',marker=dict(color=px.colors.qualitative.Set2[2], symbol='triangle-up'), name = 'Saguaro+DRAQ7_4h',marker_opacity =0.5,  marker_size=combined_df_metadata_cell_count['Metadata_Count_CellsSaguaro+DRAQ7_4h_norm']))
    plot.add_trace(go.Scatter(x=combined_df_metadata_cell_count['MoA'], y=combined_df_metadata_cell_count['Saguaro+DRAQ7_24h'],hovertext=combined_df_metadata_cell_count['Common Name'], mode='markers',marker=dict(color=px.colors.qualitative.Set2[2], symbol='square'), name = 'Saguaro+DRAQ7_24h',marker_opacity =0.5, marker_size=combined_df_metadata_cell_count['Metadata_Count_CellsSaguaro+DRAQ7_24h_norm']))
    plot.add_trace(go.Scatter(x=combined_df_metadata_cell_count['MoA'], y=combined_df_metadata_cell_count['Saguaro_4h'],hovertext=combined_df_metadata_cell_count['Common Name'], mode='markers',marker=dict(color=px.colors.qualitative.Set2[3], symbol='triangle-up'), name = 'Saguaro_4h',marker_opacity =0.5, marker_size=combined_df_metadata_cell_count['Metadata_Count_CellsSaguaro_4h_norm']))
    plot.add_trace(go.Scatter(x=combined_df_metadata_cell_count['MoA'], y=combined_df_metadata_cell_count['Saguaro_24h'],hovertext=combined_df_metadata_cell_count['Common Name'], mode='markers',marker=dict(color=px.colors.qualitative.Set2[3], symbol='square'), name = 'Saguaro_24h',marker_opacity =0.5, marker_size=combined_df_metadata_cell_count['Metadata_Count_CellsSaguaro_24h_norm']))
    plot.add_trace(go.Scatter(x=combined_df_metadata_cell_count['MoA'], y=combined_df_metadata_cell_count['Saguaro_48h'],hovertext=combined_df_metadata_cell_count['Common Name'], mode='markers',marker=dict(color=px.colors.qualitative.Set2[3], symbol='pentagon'), name = 'Saguaro_48h',marker_opacity =0.5, marker_size=combined_df_metadata_cell_count['Metadata_Count_CellsSaguaro_48h_norm']))

    

    plot.update_layout(height=1000,width=2000, font_family='sans serif', font=dict(size=18, color='Black'), boxmode='group',yaxis_title = 'Mean average precision',  legend=dict(yanchor="top",y=0.99,xanchor="left",x=0.01))
    plot.update_layout({'plot_bgcolor':'rgba(0,0,0,0)'})
    plot.update_xaxes(tickangle=90, categoryorder='total ascending')
    plot.update_traces(marker_sizemin=10, marker_sizemode='area',marker_sizeref=1)
    plot.update_layout(legend=dict(itemsizing="constant"))
    plot.update_xaxes(linecolor='black')
    plot.update_yaxes(linecolor='black')
    #plot.update_layout(paper_bgcolor='rgba(0, 0, 0, 0)')
    plot.show('notebook')

    return plot, combined_df_metadata_cell_count

Early time point analysis - Batch 6 and 8¶

Batch 6:

  • BR00122248 4h and 24h
  • Stained with Saguaro, DRAQ7 and Cas3/7

Batch 8:

  • BR00122249 4h and 24h
  • Saguaro only
In [ ]:
dict_to_load_feature_selected_early_timepoint_48h = {'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122248_normalized_negcon_wo_phasefeatures.csv':'Saguaro+DRAQ7_4h',
                                 'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122248_24h_normalized_negcon_wo_phasefeatures.csv':'Saguaro+DRAQ7_24h',
                                 'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122249_4h_normalized_negcon_wo_phasefeatures.csv':'Saguaro_4h',
                                 'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122249_24h_normalized_negcon_wo_phasefeatures.csv':'Saguaro_24h',
                                 'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\saguaro_48h_fixedcell_data_normalized_negcon_wo_phasefeatures_wo_DNAfeatures.csv':'Saguaro_48h'
}

Mean average precision (mAP) with respect to Controls¶

  • Overall mAP values
In [ ]:
mAP_earlytimepoint_plot_48h, mAP_earlytimepoint_dict_48h = copairs_batches_earlytimepoint(dict_to_load_feature_selected_early_timepoint_48h, mAP='Control')
C:\Users\ssivagur\AppData\Local\Temp\ipykernel_35644\2069034308.py:56: FutureWarning:



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.


mAP values plotted based on MOA (Controls)¶

In [ ]:
earlytime_point_result_csv = {'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122248_normalized_negcon_wo_phasefeatures_Result_NegconNorm_mAP_wrt_Control.csv':'Saguaro+DRAQ7_4h',
                              'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122248_24h_normalized_negcon_wo_phasefeatures_Result_NegconNorm_mAP_wrt_Control.csv':'Saguaro+DRAQ7_24h',
                              'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122249_4h_normalized_negcon_wo_phasefeatures_Result_NegconNorm_mAP_wrt_Control.csv':'Saguaro_4h',
                              'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122249_24h_normalized_negcon_wo_phasefeatures_Result_NegconNorm_mAP_wrt_Control.csv':'Saguaro_24h',
                              'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\saguaro_48h_fixedcell_data_normalized_negcon_wo_phasefeatures_wo_DNAfeatures_Result_NegconNorm_mAP_wrt_Control.csv':'Saguaro_48h'

}
In [ ]:
earlytime_point_aggregate_csv = {'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122248_normalized_negcon_wo_phasefeatures_Aggregate_Result_NegconNorm_mAP_wrt_Control.csv':'Saguaro+DRAQ7_4h',
                              'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122248_24h_normalized_negcon_wo_phasefeatures_Aggregate_Result_NegconNorm_mAP_wrt_Control.csv':'Saguaro+DRAQ7_24h',
                              'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122249_4h_normalized_negcon_wo_phasefeatures_Aggregate_Result_NegconNorm_mAP_wrt_Control.csv':'Saguaro_4h',
                              'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122249_24h_normalized_negcon_wo_phasefeatures_Aggregate_Result_NegconNorm_mAP_wrt_Control.csv':'Saguaro_24h',
                              'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\saguaro_48h_fixedcell_data_normalized_negcon_wo_phasefeatures_wo_DNAfeatures_Aggregate_result_NegconNorm_mAP_wrt_Control.csv':'Saguaro_48h'

}
In [ ]:
moA_control_earlytime_point_plot, df_earlytime_point_controls = cell_count_norm_colorscheme_early(earlytime_point_result_csv,earlytime_point_aggregate_csv)

Mean average precision (mAP) with respect to treatments¶

In [ ]:
mAP_earlytimepoint_trmt_plot_48h, mAP_earlytimepoint_trmt_dict_48h = copairs_batches_earlytimepoint(dict_to_load_feature_selected_early_timepoint_48h, mAP='Treatment')
C:\Users\ssivagur\AppData\Local\Temp\ipykernel_35644\2069034308.py:56: FutureWarning:



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.


mAP values plotted based on MOA (Treatments)¶

In [ ]:
earlytime_point_result_csv_trmt = {'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122248_normalized_negcon_wo_phasefeatures_Result_NegconNorm_mAP_wrt_Treatment.csv':'Saguaro+DRAQ7_4h',
                              'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122248_24h_normalized_negcon_wo_phasefeatures_Result_NegconNorm_mAP_wrt_Treatment.csv':'Saguaro+DRAQ7_24h',
                              'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122249_4h_normalized_negcon_wo_phasefeatures_Result_NegconNorm_mAP_wrt_Treatment.csv':'Saguaro_4h',
                              'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122249_24h_normalized_negcon_wo_phasefeatures_Result_NegconNorm_mAP_wrt_Treatment.csv':'Saguaro_24h',
                              'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\saguaro_48h_fixedcell_data_normalized_negcon_wo_phasefeatures_wo_DNAfeatures_Result_NegconNorm_mAP_wrt_Treatment.csv':'Saguaro_48h'

}
In [ ]:
earlytime_point_aggregate_csv_trmt = {'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122248_normalized_negcon_wo_phasefeatures_Aggregate_Result_NegconNorm_mAP_wrt_Treatment.csv':'Saguaro+DRAQ7_4h',
                              'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122248_24h_normalized_negcon_wo_phasefeatures_Aggregate_Result_NegconNorm_mAP_wrt_Treatment.csv':'Saguaro+DRAQ7_24h',
                              'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122249_4h_normalized_negcon_wo_phasefeatures_Aggregate_Result_NegconNorm_mAP_wrt_Treatment.csv':'Saguaro_4h',
                              'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\BR00122249_24h_normalized_negcon_wo_phasefeatures_Aggregate_Result_NegconNorm_mAP_wrt_Treatment.csv':'Saguaro_24h',
                              'c:\\Users\\ssivagur\\Documents\\Projects\\Cell_painting_projects\\CDoT\\Batch3_analysis_redo\\UpdatedCopairsVersion\\EarlyTimePointProfiles\\saguaro_48h_fixedcell_data_normalized_negcon_wo_phasefeatures_wo_DNAfeatures_Aggregate_result_NegconNorm_mAP_wrt_Treatment.csv':'Saguaro_48h'

}
In [ ]:
moA_treatment_earlytime_point_plot, df_earlytime_point_treatment = cell_count_norm_colorscheme_early(earlytime_point_result_csv_trmt,earlytime_point_aggregate_csv_trmt)
In [ ]: